3 General statistics
3.1 Sample statistics
3.1.4 Origin of samples (Figure S1)
read_tsv("data/sample.tsv") %>%
#subset columns
select(
sample_id,
specimen_species,
specimen_order,
specimen_class,
capture_latitude,
capture_longitude
) %>%
#Add jitter to points
mutate(
capture_latitude_jitter=capture_latitude+rnorm(length(capture_latitude), mean=0, sd=0.5),
capture_longitude_jitter=capture_longitude+rnorm(length(capture_longitude), mean=0, sd=0.5),
) %>%
#Plot map
ggplot(.) +
geom_map(
data=map_data("world"),
map = map_data("world"),
aes(long, lat, map_id=region),
color = "white", fill = "#cccccc", size = 0.2
) +
geom_point(
aes(x=capture_longitude_jitter,y=capture_latitude_jitter, color=specimen_order),
alpha=0.5, size=0.5, shape=16) +
labs(color="Taxonomic order") +
theme_minimal() +
theme(
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position = "bottom")3.2 Data statistics
3.2.1 Total data
read_tsv("data/preprocessing.tsv") %>%
mutate(bases_pre_fastp = bases_pre_fastp / 1000000000) %>% #convert bases to gigabases (GB)
summarise(
total= sum(bases_pre_fastp, na.rm = TRUE),
mean= mean(bases_pre_fastp, na.rm = TRUE),
sd = sd(bases_pre_fastp, na.rm = TRUE)
) %>%
tt()| total | mean | sd |
|---|---|---|
| 14697.42 | 5.860216 | 5.044053 |
3.2.2 Quality-filtered data
read_tsv("data/preprocessing.tsv") %>%
mutate(bases_post_fastp = bases_post_fastp / 1000000000) %>% #convert bases to gigabases (GB)
summarise(
total= sum(bases_post_fastp, na.rm = TRUE),
mean= mean(bases_post_fastp, na.rm = TRUE),
sd = sd(bases_post_fastp, na.rm = TRUE)
) %>%
tt()| total | mean | sd |
|---|---|---|
| 13486.81 | 5.377518 | 4.586315 |
3.2.3 Host genomic data
read_tsv("data/preprocessing.tsv") %>%
mutate(host_bases = host_bases / 1000000000) %>% #convert bases to gigabases (GB)
summarise(
total= sum(host_bases, na.rm = TRUE),
mean= mean(host_bases, na.rm = TRUE),
sd = sd(host_bases, na.rm = TRUE)
) %>%
tt()| total | mean | sd |
|---|---|---|
| 5554.962 | 2.193903 | 3.710612 |
3.2.4 Metagenomic data
read_tsv("data/preprocessing.tsv") %>%
mutate(metagenomic_bases = metagenomic_bases / 1000000000) %>% #convert bases to gigabases (GB)
summarise(
total= sum(metagenomic_bases, na.rm = TRUE),
mean= mean(metagenomic_bases, na.rm = TRUE),
sd = sd(metagenomic_bases, na.rm = TRUE)
) %>%
tt()| total | mean | sd |
|---|---|---|
| 7931.853 | 3.132643 | 3.272361 |